\relax 
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}}
\citation{Lloyd}
\citation{ensem2008}
\citation{ensem2009}
\citation{Andrew}
\@writefile{toc}{\contentsline {section}{\numberline {II}Preliminaries}{2}}
\newlabel{sec:preliminaries}{{II}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-A}}Clustering Problem}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-B}}Ensemble $K$-means algorithm}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-C}}Index Based Ensemble $K$-means}{2}}
\newlabel{sec:kdtree}{{\unhbox \voidb@x \hbox {II-C}}{2}}
\citation{cuda}
\citation{oclspec}
\citation{oclprgramming}
\citation{oclbestpractices}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces CPU vs GPU Architecture}}{3}}
\newlabel{fig:cpugpucomparison}{{1}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-D}} OpenCL Device Architecture}{3}}
\newlabel{sec:gpuArch}{{\unhbox \voidb@x \hbox {II-D}}{3}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Coalesced vs Non-coalesced Access}}{3}}
\newlabel{fig:coalesced}{{2}{3}}
\citation{Shalom2008}
\citation{Hall2004}
\citation{Cao}
\citation{darjen}
\citation{darjen1}
\citation{darjen2}
\citation{Cao}
\citation{Che2008}
\citation{Li}
\citation{Fang2008}
\citation{Debunk}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Bank Conflicts in Local memory}}{4}}
\newlabel{fig:bankconflict}{{3}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces OpenCL compute device}}{4}}
\newlabel{fig:computeDevice}{{4}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-E}}Related Work}{4}}
\@writefile{toc}{\contentsline {section}{\numberline {III}Parallel $K$-means Algorithm}{4}}
\newlabel{sec:parallelKMeans}{{III}{4}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces General Algorithm}}{4}}
\newlabel{alg:genAlg}{{1}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-A}}Parallelization Issues}{4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}1}Concurrency}{4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}2}Memory constraints}{4}}
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Parallelization Methodologies}}{5}}
\newlabel{table}{{I}{5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}3}Synchronization and atomic operations}{5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}4}Data transfer between host and device}{5}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Task Parallelism}}{5}}
\newlabel{fig:TaskParallelism}{{5}{5}}
\@writefile{toc}{\contentsline {section}{\numberline {IV}OpenCL Implementation}{5}}
\newlabel{sec:oclimp}{{IV}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}Task Parallelism}{5}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces Task Parallelism}}{6}}
\newlabel{alg:taskP}{{2}{6}}
\newlabel{tcomputation}{{1}{6}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Computation phase of task parallelism}{6}}
\newlabel{twriteback}{{2}{6}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Write back phase of task parallelism}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}Data Parallelism}{6}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Data Parallelism}}{7}}
\newlabel{figDataParallelism}{{6}{7}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces Data Parallelism}}{7}}
\newlabel{alg:dataP}{{3}{7}}
\newlabel{dcomputation}{{3}{7}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Computation phase of data parallelism}{7}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Concatenated Parallelism}}{7}}
\newlabel{fig:ConcatenatedParallelism}{{7}{7}}
\newlabel{dwriteback}{{4}{7}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}Write back phase of data parallelism}{7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}Concatenated Parallelism}{7}}
\newlabel{sec:concat}{{\unhbox \voidb@x \hbox {IV-C}}{7}}
\citation{Alsabti}
\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces Concatenated Parallelism}}{8}}
\newlabel{alg:concatP}{{4}{8}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Local memory atomic update}}{8}}
\newlabel{fig:highcontention}{{8}{8}}
\newlabel{ccomputation}{{5}{8}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}Computation phase of concatenated parallelism}{8}}
\newlabel{cwriteback}{{6}{8}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}Write back phase of concatenated parallelism}{8}}
\@writefile{toc}{\contentsline {section}{\numberline {V}Parallel $K$-means Using KD tree}{8}}
\newlabel{sec:parallelKDTree}{{V}{8}}
\newlabel{kdtree}{{V}{9}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces KD-tree with data points and centroids}}{9}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {5}{\ignorespaces Pruning using KD-tree}}{9}}
\newlabel{alg:kdGenAlg}{{5}{9}}
\newlabel{directmap}{{\unhbox \voidb@x \hbox {V-A}1}{9}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces KD Tree: Direct Mapping}}{9}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-A}}Concatenated Parallelism using KD-tree}{9}}
\newlabel{sec:kdconcat}{{\unhbox \voidb@x \hbox {V-A}}{9}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-A}1}Direct Access}{9}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-A}2}Distributed Access}{9}}
\newlabel{distributedmap}{{\unhbox \voidb@x \hbox {V-A}2}{10}}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces KD Tree: Distributed Mapping}}{10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-A}3}Eliminating Memory Conflicts Using Replication}{10}}
\@writefile{toc}{\contentsline {section}{\numberline {VI}Empirical Evaluation}{10}}
\newlabel{sec:experiments}{{VI}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-A}}Datasets}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-B}}Benchmark Machines}{10}}
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Benchmark Hardware Specification}}{10}}
\newlabel{htable}{{II}{10}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Comparison of three clustering approaches on CPU}}{10}}
\newlabel{fig:comparisonCPU}{{12}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-C}}Comparison of Three Approaches on a 32-core CPU}{10}}
\newlabel{sec:expr1}{{\unhbox \voidb@x \hbox {VI-C}}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-D}}Comparison of Three Approaches on FERMI}{10}}
\newlabel{sec:expr2}{{\unhbox \voidb@x \hbox {VI-D}}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-E}}Performance Comparison of various Multi-core Hardware}{10}}
\newlabel{sec:expr3}{{\unhbox \voidb@x \hbox {VI-E}}{10}}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Comparison of three clustering approaches on FERMI}}{11}}
\newlabel{fig:comparisonFERMI}{{13}{11}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Performance Comparison of various Hardware}}{11}}
\newlabel{fig:comparisonConcat}{{14}{11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-F}}Comparison of the Three KD-tree Algorithms}{11}}
\newlabel{sec:expr4}{{\unhbox \voidb@x \hbox {VI-F}}{11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-G}}Comparison of KD-tree and Basic Implementation of Concatenated Parallelism}{11}}
\newlabel{sec:expr5}{{\unhbox \voidb@x \hbox {VI-G}}{11}}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Comparison of the Three KD-tree Algorithms}}{11}}
\newlabel{fig:comparisonKdtree}{{15}{11}}
\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces KD-tree vs Basic Implementation of CP}}{11}}
\newlabel{fig:comparisonKdtreeWithNormal}{{16}{11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-H}}Comparison of KD-tree and Basic Implementation for Varying Dimensions}{11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-I}}Discussion}{11}}
\bibstyle{IEEEtran}
\bibdata{ref}
\bibcite{Lloyd}{1}
\bibcite{ensem2008}{2}
\bibcite{ensem2009}{3}
\bibcite{Andrew}{4}
\bibcite{cuda}{5}
\bibcite{oclspec}{6}
\bibcite{oclprgramming}{7}
\bibcite{oclbestpractices}{8}
\bibcite{Shalom2008}{9}
\bibcite{Hall2004}{10}
\bibcite{Cao}{11}
\bibcite{darjen}{12}
\bibcite{darjen1}{13}
\bibcite{darjen2}{14}
\bibcite{Che2008}{15}
\bibcite{Li}{16}
\bibcite{Fang2008}{17}
\bibcite{Debunk}{18}
\bibcite{Alsabti}{19}
\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces KD-tree vs Basic Implementation for Varying Dimensions}}{12}}
\newlabel{fig:comparisonKddimension}{{17}{12}}
\@writefile{toc}{\contentsline {section}{\numberline {VII}Conclusion}{12}}
\@writefile{toc}{\contentsline {section}{References}{12}}
